##
## Script name: MCA analisi azaldua
##
## Purpose of script: MCA analisia egitea eta horren osagaiak nabarmentzea
##
## Author: Juan
##
## Date Created: 2019-05-15
##
## Email: juan.abasolo@ehu.eus
##
##
## Notes:
##
##
## load up the packages we will need: (uncomment as required)
require(tidyverse)
## Loading required package: tidyverse
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## ── Attaching packages ────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
require(data.table)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
require(factoextra)
## Loading required package: factoextra
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
require(FactoMineR)
## Loading required package: FactoMineR
require(stringr)
require(scatterplot3d)
## Loading required package: scatterplot3d
# source('') # loads up all the packages we need
## Datuak sartu
dtk <- read.table('data/raw/EAS 1t2 Arratia V0-08-1.csv', sep = ',', header = T, quote = '\"', stringsAsFactors = F)
dtk$Galdera <- factor(dtk$Galdera)
dtk$Herria <- factor(dtk$Herria)
dtk$Inkesta <- factor(dtk$Inkesta)
dtk$Herria.1 <- factor(dtk$Herria.1)
dtk$Belaunaldia <- factor(dtk$Belaunaldia)
dtk <- dtk[-which(dtk$Kendu=='ezabatu'),]
dtk$Kendu <- NULL
dtk$Unit <- paste(dtk$Herria.1, dtk$Belaunaldia, sep = ' ')
dtk$Unit2 <- paste(dtk$Herria, dtk$Inkesta, sep = ' ')
dtk$col <- NA
dtk[which(dtk$Inkesta == 'Estandarra'), 'col'] <- 2
dtk[which(dtk$Inkesta == 'EAS1'), 'col'] <- 4
dtk[which(dtk$Inkesta == 'EAS2'), 'col'] <- 3
## Datuen hasiera eta amaiera erakutsi
knitr::kable(rbind(head(dtk), tail(dtk)),
caption = 'Datuen hasiera eta amaiera')
| Galdera | Herria | Fon | Orto | Lema | Inkesta | Oharrak | Herria.1 | Belaunaldia | Unit | Unit2 | col | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 7060 | 1 | ilargi | Estandarra | estandarra | Eskola | estandarra Eskola | 1 Estandarra | 2 | |||
| 2 | 7060 | 111 | ilarɣi | ilargi | ilargi | EAS1 | Zeberio | Heldu | Zeberio Heldu | 111 EAS1 | 4 | |
| 3 | 7060 | 111 | iˈlarɣi | iˈlargi | ilargi | EAS2 | eas2sartubarri | Zeberio | Gazte | Zeberio Gazte | 111 EAS2 | 3 |
| 4 | 7060 | 116 | iʎár̄ɣi | illárgi | ilargi | EAS1 | Lemoa | Heldu | Lemoa Heldu | 116 EAS1 | 4 | |
| 5 | 7060 | 116 | iʎarˈɣi | illarrˈgi | ilargi | EAS2 | eas2sartubarri | Lemoa | Gazte | Lemoa Gazte | 116 EAS2 | 3 |
| 6 | 7060 | 117 | iðér̄ɣi | idérgi | idergi | EAS1 | Dima | Heldu | Dima Heldu | 117 EAS1 | 4 | |
| 1755 | 94460 | 116 | beré | berré | bere | EAS1 | Lemoa | Heldu | Lemoa Heldu | 116 EAS1 | 4 | |
| 1756 | 94460 | 116 | βeɾe | bere | bere | EAS2 | Lemoa | Gazte | Lemoa Gazte | 116 EAS2 | 3 | |
| 1757 | 94460 | 117 | beren | berren | beren | EAS1 | Dima | Heldu | Dima Heldu | 117 EAS1 | 4 | |
| 1758 | 94460 | 117 | βere | berre | bere | EAS2 | Dima | Gazte | Dima Gazte | 117 EAS2 | 3 | |
| 1759 | 94460 | 118 | βeɾe | bere | bere | EAS1 | Zeanuri | Heldu | Zeanuri Heldu | 118 EAS1 | 4 | |
| 1760 | 94460 | 118 | βeɾe | bere | bere | EAS2 | Zeanuri | Gazte | Zeanuri Gazte | 118 EAS2 | 3 |
## Datuak antolatu
## Aukeratu intereseko zutabeak eta zabalaetarako taula sortu
dtk.x <- dtk[,c("Unit", "Galdera","Lema", 'col')]
dtk.x <- spread(dtk.x, key = Galdera, value = Lema)
## Datu hutsuneak 0akin bete
DT.for.set.sqln <- function(x) { for (j in seq_len(ncol(x)))
set(x,which(is.na(x[[j]])),j,0) }
suppressWarnings(DT.for.set.sqln(dtk.x))
## Balio bitarretara pasatu
dtk.x.plus <- dtk.x%>%gather(caracteristica, valor, 2:length(names(dtk.x)))%>%
separate(valor, sep = ',', c("v1", "v2", "v3"))%>%
gather(v, valor, 3:5)%>%
filter(!is.na(valor))%>%
mutate(categoria_bin=paste(caracteristica, "-", valor, sep = ''))%>%
select(Unit, categoria_bin)%>%
mutate(value = 1)%>%
spread(categoria_bin, value, fill= 0)
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 1797
## rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
## 20, ...].
for(i in names(dtk.x.plus)){
# print(i)
dtk.x.plus[,i] <- factor(dtk.x.plus[,i])
}
row.names(dtk.x.plus) <- dtk.x.plus$Unit
dtk.x.plus <- dtk.x.plus[,-c(1, 564:566)]
## Identifikau datu falten aldagaiak
kentzeko <- str_which(names(dtk.x.plus), '-0')
dtk.x.plus <- dtk.x.plus[,-kentzeko]
## Koloreetarako aldagaia barriro sartu
# Kasu! col aldagai hori ez da faktorea; beraz, analisitik kendu beharra dago.
dtk.x.plus$col <- dtk.x$col
## Datuen analisia
arratia.eas.mca <- MCA(dtk.x.plus[,-522], ncp = 4, graph = T)
## Dimentsioek zenbat azaltzen duten
fviz_screeplot(arratia.eas.mca, addlabels = TRUE, ylim = c(0, 35))
## Base erabilita:
barplot(arratia.eas.mca$eig[,2], col = c(rep('red',3), rep('darkblue',5)),
main = 'Info berbera irudi tuneagarriagoan',
sub = 'Baina tuneatik esan nahi jok bihar handijjagua eukiti, jakina!')
## Dimentsioak zerk osatzen dituen (herriak/lemak)
# arratia.eas.mca$var$coord
# Kontribuzioa (10 banakoak -herriak- eta 200 aldagai)
fviz_mca_biplot(arratia.eas.mca,
select.ind = list(contrib = 10),
select.var = list(contrib = 200))
Irudian argi agertzen dira irudikatuta informatzaileak (?). Aldagaiak, berriz, oso multzokatuta agertzen dira (200/522 irudikatzen dira). Koherentea da hori herri/belaunaldi banaketarekin. Aldagai askok azaltzen baitu euskara estandarretik herrietako hizkeretara dagoen aldea (%31,9), herriko hizkeren arteko aldea, berriz, beste aldagai batzuk erakusten dute, azalpen ahalmen gutxiagorekin (%12.01)
## Datuek errepresentaten dutenaren kalitatea:
## Koloriek erakusten dabe zein neurritan dagozan ondo ala txarto islatuta dimentsino horreetan
## hain daukagu aldagai asko, eze, uste dot ez dala holakorik erakutsi behar.
fviz_mca_var(arratia.eas.mca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())
Espero izatekoa den moduan, ondoen errepresentatuta agertzen dira lehenengo dimentsioan zehar banatzen direnak. Bigarren maila batean agertzen dira bigarren dimentsioan zehar errepresentatzen direnak Eta txartoen errepresentatuta daudenak 0,0 ardatzetik hurreen agertzen dira.
## Dimentsioak zerk osatzen duen, herriak eta aldagaiak
## Lehenengo dimentsioa
# Cos2 of individuals
factoextra::fviz_cos2(arratia.eas.mca, choice = "ind", axes = 1:2, top = 20)
factoextra::fviz_cos2(arratia.eas.mca, choice = "ind", axes = 2:3, top = 20)
# Contribution of individuals to the dimensions
factoextra::fviz_contrib(arratia.eas.mca, choice = "ind", axes = 1, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "var", axes = 1, top = 120)
factoextra::fviz_contrib(arratia.eas.mca, choice = "ind", axes = 2, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "var", axes = 2, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "ind", axes = 3, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "var", axes = 3, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "ind", axes = 4, top = 20)
factoextra::fviz_contrib(arratia.eas.mca, choice = "var", axes = 4, top = 20)
## Dimentsio biko irudiak
tauli <- arratia.eas.mca$ind$coord
tauli <- cbind(tauli, col = dtk.x$col)
plot(tauli[,1:2],
col = tauli[,'col'],
pch = 20,
cex = 3,
xlab = paste( 'Aldakuntzen %',
round(arratia.eas.mca$eig[1,2], 2), 'azaltzen du'),
ylab = paste( 'Aldakuntzen %',
round(arratia.eas.mca$eig[2,2], 2), 'azaltzen du'),
main = 'Arratiako hizkerak belaunaldi bitan eta euskara estandarra',
sub = 'Bariazioaren azalpenaren lehenengo bi dimentsioak'
)
text(tauli[,1:2], labels = row.names(tauli), col = 'gray40')
plot(tauli[,2:3],
col = tauli[,'col'],
pch = 20,
cex = 3,
xlab = paste( 'Aldakuntzen %',
round(arratia.eas.mca$eig[2,2], 2), 'azaltzen du'),
ylab = paste( 'Aldakuntzen %',
round(arratia.eas.mca$eig[3,2], 2), 'azaltzen du'),
main = 'Arratiako hizkerak belaunaldi bitan eta euskara estandarra',
sub = 'Bariazioaren azalpenaren bigarren eta hirugarren dimentsioak'
)
text(tauli[,2:3], labels = row.names(tauli), col = 'gray40')
## 3D irudia
x <- tauli[,1]
y <- tauli[,2]
z <- tauli[,3]
sct3d <- scatterplot3d(x,
y,
z,
color = tauli[,'col'],
pch = 19,
type = 'p', # c('p', 'h', 'l')
main = "Arratiako hizkerak eta batua 3D errepresantazioan",
xlab = '1. dimentsioa',
ylab = '2. dimentsioa',
zlab = '3. dimentsioa')
s3d.coords <-sct3d$xyz.convert(x,y,z)
text(s3d.coords$x,
s3d.coords$y,
labels = row.names(tauli),
col = 'gray40',
cex = 1.2,
pos = 4)
# Beste bertsino bat
plot3D::text3D(arratia.eas.mca$ind$coord[,1],
arratia.eas.mca$ind$coord[,2],
arratia.eas.mca$ind$coord[,3],
labels = row.names(arratia.eas.mca$ind$coord),
col = rgb(scales::rescale(arratia.eas.mca$ind$coord, to = c(0,1))))
plot3Drgl::plotrgl()
x <- arratia.eas.mca$ind$coord[,1]
y <- arratia.eas.mca$ind$coord[,2]
z <- arratia.eas.mca$ind$coord[,3]
# Open a new RGL device, aurreko testudunagaz segitzeko, markata itxi
# rgl::open3d()
# rgl::rgl.open()
# rgl::rgl.bg(color = "white") # Setup the background color
rgl::rgl.spheres(data.frame(x, y, z), r = 0.02,
color = rgb(scales::rescale(arratia.eas.mca$ind$coord, to = c(0,1))))
# Add x, y, and z Axes
rgl::rgl.lines(c(min(x), max(x)), c(0, 0), c(0, 0), color = "red")
rgl::rgl.lines(c(0, 0), c(min(y), max(y)), c(0, 0), color = "green")
rgl::rgl.lines(c(0, 0), c(0, 0), c(min(z), max(z)), color = "blue")
## Puntu diagramak eta osagaien ekarpenak
## Ekarpenen taula garbitua
t.var.ekarpenak <- arratia.eas.mca$var$contrib
kentzeko <- str_which(attributes(t.var.ekarpenak)$dimnames[[1]], '_0')
t.var.ekarpenak <- t.var.ekarpenak[-kentzeko,]
# t.var.ekarpenak
attributes(t.var.ekarpenak)$dimnames[[1]] <- gsub('_1', '', attributes(t.var.ekarpenak)$dimnames[[1]])
## Irudiak
dotchart(tauli[,1:4], main = 'Lau dimentsioen banaketa', color = tauli[,"col"], cex = 0.8)
# Dimentsioka
par(mfrow = c(1, 2))
dotchart(tauli[order(tauli[,1]),1], color = tauli[order(tauli[, 1]),'col'], pch = 20,
main = paste( 'Dim 1 - Aldakuntzen %',
round(arratia.eas.mca$eig[1,2], 2)))
# knitr::kable(t.var.ekarpenak[order(t.var.ekarpenak[,1],
# decreasing = TRUE), 1][1:120])
dotchart(sort(t.var.ekarpenak[order(t.var.ekarpenak[,1],
decreasing = TRUE), 1][1:150]), cex = 0.7,
main = 'aldagaien ekarpenak')
Gehiegi dira ondo irakurri ahal izateko era honetara aurkeztuta. Hemen taulan:
knitr::kable(t.var.ekarpenak[order(t.var.ekarpenak[,1],
decreasing = TRUE), 1][1:120])
| x | |
|---|---|
| 10060-ostiral | 0.5846321 |
| 11030-herenegun | 0.5846321 |
| 11090-bart | 0.5846321 |
| 11140-arratsalde | 0.5846321 |
| 12170-eguberri | 0.5846321 |
| 16070-herdoil | 0.5846321 |
| 19460-zur | 0.5846321 |
| 21080-tipula | 0.5846321 |
| 29040-kosk egin | 0.5846321 |
| 35820-piztu | 0.5846321 |
| 36410-onil | 0.5846321 |
| 36670-aizto | 0.5846321 |
| 47050-ardo | 0.5846321 |
| 52030-ozpin | 0.5846321 |
| 54120-erratz | 0.5846321 |
| 55110-buruorratz | 0.5846321 |
| 58100-giltza | 0.5846321 |
| 58280-begiratu | 0.5846321 |
| 59110-zilbor | 0.5846321 |
| 59410-aztal | 0.5846321 |
| 59660-azazkal | 0.5846321 |
| 59720-eseri | 0.5846321 |
| 64090-oinutsik | 0.5846321 |
| 67010-jolastu | 0.5846321 |
| 70070-erren | 0.5846321 |
| 72030-hotzeri | 0.5846321 |
| 72310-konorte galdu | 0.5846321 |
| 73090-hileta | 0.5846321 |
| 73100-hilerri | 0.5846321 |
| 76060-izeba | 0.5846321 |
| 76110-amona | 0.5846321 |
| 76180-aitaginarreba | 0.5846321 |
| 76190-amaginarreba | 0.5846321 |
| 8220-trumoi | 0.5846321 |
| 8310-lehorte | 0.5846321 |
| 84440-garesti | 0.5846321 |
| 86270-herria | 0.5846321 |
| 86540-astoekin | 0.5846321 |
| 86550-astoekin | 0.5846321 |
| 86600-astoentzat | 0.5846321 |
| 86780-itsasorantz | 0.5846321 |
| 87810-Aitorri | 0.5846321 |
| 88100-gehiegi | 0.5846321 |
| 89050-hauek | 0.5846321 |
| 89080-honi | 0.5846321 |
| 89260-hura | 0.5846321 |
| 89740-nola | 0.5846321 |
| 90010-naiz | 0.5846321 |
| 90060-zarete | 0.5846321 |
| 90310-dadin | 0.5846321 |
| 90360-daiteke | 0.5846321 |
| 90410-zait | 0.5846321 |
| 90520-zitzaidan | 0.5846321 |
| 90590-zitzaizkion | 0.5846321 |
| 90700-dut | 0.5846321 |
| 90730-du | 0.5846321 |
| 90750-duzue | 0.5846321 |
| 90760-dute | 0.5846321 |
| 90780-dituzte | 0.5846321 |
| 90850-zuen | 0.5846321 |
| 91030-dezan | 0.5846321 |
| 91040-ditzan | 0.5846321 |
| 91110-dezake | 0.5846321 |
| 91240-dio | 0.5846321 |
| 91250-die | 0.5846321 |
| 91260-diogu | 0.5846321 |
| 91280-diote | 0.5846321 |
| 91320-didazu | 0.5846321 |
| 91510-diezaioke | 0.5846321 |
| 92020-zoazte | 0.5846321 |
| 92120-zaudete | 0.5846321 |
| 92270-zatozte | 0.5846321 |
| 92320-zabiltzate | 0.5846321 |
| 92450-daukate | 0.5846321 |
| 92460-dauzka | 0.5846321 |
| 92480-zeukan | 0.5846321 |
| 92530-dakarte | 0.5846321 |
| 92540-dakartza | 0.5846321 |
| 93220-dik | 0.5846321 |
| 94050-nortaz | 0.5846321 |
| 94080-etorri al da | 0.5846321 |
| 10070-larunbat | 0.5846321 |
| 35850-pospolo | 0.2479610 |
| 56040-urratu | 0.2479610 |
| 59830-jaurti | 0.2355487 |
| 69070-inbidia | 0.2355487 |
| 72330-hasperen | 0.2355487 |
| 89450-zeuk | 0.2355487 |
| 90870-zenuen | 0.2355487 |
| 14100-uholde | 0.2340670 |
| 59770-korrika | 0.2340670 |
| 49060-gurin | 0.2303718 |
| 66180-errieta egin | 0.2303718 |
| 8070-zirimola | 0.2303718 |
| 90770-ditu | 0.2303718 |
| 92040-nindoan | 0.2303718 |
| 72320-nekatu | 0.2217122 |
| 87010-zerua | 0.2217122 |
| 94320-janarazi | 0.2217122 |
| 26340-gazta | 0.2192035 |
| 71030-zaldar | 0.2192035 |
| 90890-zituen | 0.2192035 |
| 52250-haragi | 0.2109823 |
| 69080-alfer | 0.1928872 |
| 10220-abuztu | 0.1297122 |
| 94290-ahal n | 0.1297122 |
| 94250-berdin Nnori | 0.1274671 |
| 19010-zuhaitz | 0.1193496 |
| 35030-logela | 0.1193496 |
| 8250-ostadar | 0.1193496 |
| 27440-odoloste | 0.1142670 |
| 54020-errauts | 0.1107041 |
| 29030-zaunka egin | 0.1011551 |
| 11030-heregun | 0.0730790 |
| 11140-arrasti | 0.0730790 |
| 21080-kinpula | 0.0730790 |
| 35820-izetu | 0.0730790 |
| 36410-enbudo | 0.0730790 |
| 36670-kutxillo | 0.0730790 |
| 47050-ardau | 0.0730790 |
par(mfrow = c(1, 2))
dotchart(tauli[order(tauli[,2]),2], color = tauli[order(tauli[, 2]),'col'], pch = 20,
main = paste( 'DIM2 aldakuntzen %',
round(arratia.eas.mca$eig[2,2], 2)))
# knitr::kable(t.var.ekarpenak[order(t.var.ekarpenak[,2],
# decreasing = TRUE), 2][1:50])
dotchart(sort(t.var.ekarpenak[order(t.var.ekarpenak[,2],
decreasing = TRUE), 2][1:50]), cex = 0.7,
main = 'aldagaien ekarpenak')
par(mfrow = c(1, 2))
dotchart(tauli[order(tauli[,3]),3], color = tauli[order(tauli[, 3]),'col'], pch = 20,
main = paste( 'DIM3 aldakuntzen %',
round(arratia.eas.mca$eig[3,2], 2)))
# knitr::kable(t.var.ekarpenak[order(t.var.ekarpenak[, 3],
# decreasing = TRUE), 3][1:50])
dotchart(sort(t.var.ekarpenak[order(t.var.ekarpenak[, 3],
decreasing = TRUE), 3][1:50]), cex = 0.7,
main = 'aldagaien ekarpenak')
par(mfrow = c(1, 2))
dotchart(tauli[order(tauli[,4]),4], color = tauli[order(tauli[, 4]),'col'], pch = 20,
main = paste( 'DIM4 aldakuntzen %',
round(arratia.eas.mca$eig[4,2], 2)))
# knitr::kable(t.var.ekarpenak[order(t.var.ekarpenak[, 4],
# decreasing = TRUE), 4][1:50])
dotchart(sort(t.var.ekarpenak[order(t.var.ekarpenak[, 4],
decreasing = TRUE), 4][1:50]), cex = 0.7,
main = 'aldagaien ekarpenak')
par(mfrow = c(1, 1))
## Mapa(k) (?)